1.2 Google form analysis tests with sessions


In [ ]:
%run "../Functions/2. Google form analysis.ipynb"

(userIdThatDidNotAnswer) (userId1AnswerEN) (userIdAnswersEN) (userId1ScoreEN) (userIdScoresEN) (userId1AnswerFR) (userIdAnswersFR) (userId1ScoreFR) (userIdScoresFR) (userIdAnswersENFR)

getAnswerTemporality tinkering


In [ ]:
allGFormResponders = getAllResponders()
users that played more than once

In [ ]:
for uid in allGFormResponders:
    if(len(getUserSessions(rmdf1522, uid)) > 2):
        print(uid + ' ' + str(len(getUserSessions(rmdf1522, uid))))

In [ ]:
type(allGFormResponders)

In [ ]:
len(allGFormResponders)

In [ ]:
allUniqueGFormResponders = pd.Series(allGFormResponders).unique()
len(allUniqueGFormResponders)
users that answered more than once

In [ ]:
allMultipleGFormResponders = pd.Series(gform[localplayerguidkey]).value_counts()
allMultipleGFormResponders = allMultipleGFormResponders[allMultipleGFormResponders > 1]
allMultipleGFormResponders = pd.Series(allMultipleGFormResponders.index)
#allMultipleGFormResponders

In [ ]:
testUserID = '"bfdfd356-5d6f-4696-a2f1-c1dc338aa64b"' # 4 sessions

In [ ]:
#testUserID = '"a7936587-8b71-43b6-9c61-17b2c2b55de3"' # 2 sessions

gform timestamps


In [ ]:
gformTimestamps = gform[gform[localplayerguidkey]==testUserID][QTimestamp]

In [ ]:
gform[gform[QTimestamp] > '2017/10/15'][QTimestamp].sort_values()

In [ ]:
pd.to_datetime('2017/10/16 2:14:12 PM GMT+1')

In [ ]:
pd.to_datetime('2017/10/16 2:14:12 PM GMT+1', utc=True)

In [ ]:
pd.to_datetime('2017/10/16 2:14:12 PM')

RedMetrics timestamps


In [ ]:
pd.to_datetime('2017-11-02T11:08:03.813Z')

In [ ]:
pd.to_datetime('2017-11-02T11:08:03.813Z', utc=True)

In [ ]:
rmdf1522\
[(rmdf1522['userTime']>pd.Timestamp('2017-10-26T14:37', tz='UTC'))\
& (rmdf1522['userTime']<pd.Timestamp('2017-10-26T14:38', tz='UTC'))]\
['userTime'].sort_values()
#.dropna(subset=['userTime'])

In [ ]:
#rmdf1522.query("('2017-10-24T08:15:11' < userTime < '2017-10-24T08:15:22')")

In [ ]:
rmdf1522[rmdf1522['userTime']>pd.Timestamp('2017-10-16T11:58:03', tz='UTC')]['userTime'].sort_values().head()

comparison

firstGameTime='2017-10-16 12:06:39.217000+00:00' dateGform='2017-10-16 14:04:27+00:00' dateGform='2017-10-16 15:22:25+00:00' firstGameTime='2017-10-16 11:58:03.987000+00:00' dateGform='2017-10-16 13:55:31+00:00' dateGform='2017-10-16 15:06:32+00:00'

67603 2017-10-16T12:06:39.217Z 18 2017/10/16 1:04:27 PM GMT+1 23 2017/10/16 2:22:25 PM GMT+1

66989 2017-10-16T11:58:03.987Z
13 2017/10/16 12:55:31 PM GMT+1 59 2017/10/16 2:06:32 PM GMT+1


In [ ]:
rmfirst1 = '2017-10-16T12:06:39.217Z'
gfa1before1 = '2017/10/16 1:04:27 PM GMT+1'
gfa1after1 = '2017/10/16 2:22:25 PM GMT+1'

rmfirst2 = '2017-10-16T11:58:03.987Z'      
gfa1before2 = '2017/10/16 12:55:31 PM GMT+1'
gfa1after2 = '2017/10/16 2:06:32 PM GMT+1'

In [ ]:
dt_rmfirst1 = pd.to_datetime(rmfirst1)
dt_gfa1before1 = pd.to_datetime(gfa1before1)
dt_gfa1after1 = pd.to_datetime(gfa1after1)

dt_rmfirst2 = pd.to_datetime(rmfirst2)
dt_gfa1before2 = pd.to_datetime(gfa1before2)
dt_gfa1after2 = pd.to_datetime(gfa1after2)

In [ ]:
dt_rmfirst1 = pd.to_datetime(rmfirst1)
dt_gfa1before1 = pd.to_datetime(gfa1before1)
dt_gfa1after1 = pd.to_datetime(gfa1after1)

dt_rmfirst2 = pd.to_datetime(rmfirst2)
dt_gfa1before2 = pd.to_datetime(gfa1before2)
dt_gfa1after2 = pd.to_datetime(gfa1after2)

In [ ]:
dt_rmfirst1utc = pd.to_datetime(rmfirst1, utc=True)
dt_gfa1before1utc = pd.to_datetime(gfa1before1).tz_localize('Europe/Berlin')
dt_gfa1after1utc = pd.to_datetime(gfa1after1).tz_localize('Europe/Berlin')

dt_rmfirst2utc = pd.to_datetime(rmfirst2, utc=True)
dt_gfa1before2utc = pd.to_datetime(gfa1before2).tz_localize('Europe/Berlin')
dt_gfa1after2utc = pd.to_datetime(gfa1after2).tz_localize('Europe/Berlin')

In [ ]:
[dt_rmfirst1,dt_gfa1before1,dt_gfa1after1,dt_rmfirst2,dt_gfa1before2,dt_gfa1after2]

In [ ]:
[dt_rmfirst1utc,dt_gfa1before1utc,dt_gfa1after1utc,dt_rmfirst2utc,dt_gfa1before2utc,dt_gfa1after2utc]

In [ ]:
[dt_rmfirst1utc>dt_gfa1before1utc,dt_rmfirst1utc<dt_gfa1after1utc,dt_rmfirst2utc>dt_gfa1before2utc,dt_rmfirst2utc<dt_gfa1after2utc]

In [ ]:
stamp = pd.to_datetime('2017-11-02T11:08:03.813Z', utc=True)
stamp

In [ ]:
stamp = pd.to_datetime('2017-11-02T11:08:03.813Z')
type(stamp)

In [ ]:
#stamp = stamp.tz_localize('America/Sao_Paulo')
stamp = stamp.tz_localize('GMT')
new_stamp = stamp.tz_convert('US/Eastern')
new_stamp

In [ ]:
stamp

In [ ]:
new_stamp

In [ ]:
for timestamp in gformTimestamps:
    print(timestamp)

In [ ]:
sessions = getUserSessions(rmdf1522, testUserID)
len(sessions)

In [ ]:
sessions

getFirstEventDate tinkering


In [ ]:
_userId = 'bfdfd356-5d6f-4696-a2f1-c1dc338aa64b'
_rmDF = rmdf1522

# get datetime of first significant event
# _userId is assumed to be in RedMetrics format
# returns a pandas._libs.tslib.Timestamp
#def getFirstEventDate( _userId, _rmDF = rmdf1522 ):
_sessions = getUserSessions(_rmDF, _userId)

_firstGameTime = pd.Timestamp.max.tz_localize('utc')

for session in _sessions:
    _timedEvents = _rmDF[_rmDF['sessionId']==session]

    # drops irrelevant events like 'start' and 'configure' ones
    _timedEvents = _timedEvents.dropna(subset=['section'])

    if(len(_timedEvents) > 0):
        _earliest = _timedEvents['userTime'].min()
        if(_earliest < _firstGameTime):
            _firstGameTime = _earliest

_firstGameTime

getBoundingEventDates tinkering


In [ ]:
_userId = 'bfdfd356-5d6f-4696-a2f1-c1dc338aa64b'
_rmDF = rmdf1522

# get datetime of first and last significant events
# _userId is assumed to be in RedMetrics format
# returns a pair of pandas._libs.tslib.Timestamp
#def getBoundingEventDates( _userId, _rmDF = rmdf1522 ):
_sessions = getUserSessions(_rmDF, _userId)

_firstGameTime = pd.Timestamp.max.tz_localize('utc')
_lastGameTime = pd.Timestamp.min.tz_localize('utc')

for session in _sessions:
    _timedEvents = _rmDF[_rmDF['sessionId']==session]

    # drops irrelevant events like 'start' and 'configure' ones
    _timedEvents = _timedEvents.dropna(subset=['section'])

    if(len(_timedEvents) > 0):
        _earliest = _timedEvents['userTime'].min()
        _latest = _timedEvents['userTime'].max()
        if(_earliest < _firstGameTime):
            _firstGameTime = _earliest
        if(_latest > _lastGameTime):
            _lastGameTime = _latest

(_firstGameTime,_lastGameTime)

getTemporality tinkering/exploration


In [ ]:
for timestamp in gform[gform[localplayerguidkey]==_userId][QTimestamp]:
    print(timestamp)
    if(timestamp < _firstGameTime):
        print(answerTemporalities[0])
    elif (timestamp > _firstGameTime):
        print(answerTemporalities[1])
    else:
        print(answerTemporalities[2])

In [ ]:
gform[gform[localplayerguidkey]==_userId][QTimestamp]

In [ ]:
for userId in allMultipleGFormResponders:
    (firstGameTime,lastGameTime) = getBoundingEventDates(userId)
    
    strDebug = '\n\n\nuid='+str(userId)\
    +'\nfirst= ' + str(firstGameTime)\
    +'\nlast=  ' + str(lastGameTime)
    
    gformTimestamps = gform[gform[localplayerguidkey]==userId][QTimestamp]
    
    for dateGform in gformTimestamps:
        # Google forms Timestamps are GMT+1
        #print('dateGform='+str(dateGform))
        dateGform = dateGform.tz_convert('utc')
        if(firstGameTime != pd.Timestamp.max.tz_localize('utc')):
            if(dateGform <= firstGameTime):
                strDebug = strDebug + '\n' + str(dateGform) + '=>' + str(answerTemporalities[0])
            elif (dateGform > firstGameTime):
                strDebug = strDebug + '\n' + str(dateGform) + '=>' + str(answerTemporalities[1])
            else:
                strDebug = strDebug + '\n' + str(dateGform) + '=>' + str(answerTemporalities[2])
        else:
            strDebug = strDebug + '\n' + str(dateGform) + '=>' + str(answerTemporalities[2])
            
    print(strDebug)

In [ ]:
gform[localplayerguidkey][12]

In [ ]:
gform[localplayerguidkey][50]

In [ ]:
gform[localplayerguidkey][50]

In [ ]:
getFirstEventDate(gform[localplayerguidkey][50])

In [ ]:
getUserSessions(rmdf1522, gform[localplayerguidkey][3])

In [ ]:
#for index in gform.index:
#    print(index)

In [ ]:
gform[QTimestamp][0]

In [ ]:
gform.loc[0][localplayerguidkey]

In [ ]:
print(gform[QTemporality][0])

In [ ]:
print(gform.loc[6,QTemporality])

In [ ]:
setAnswerTemporalities(gform)

extended getTemporality

functions

In [ ]:
def getFuncHasSameDate(thisDate):
    def hasSameDate(timestamp):
        return timestamp.date() == thisDate
    return hasSameDate

In [ ]:
def associateUserIdWithGFAnswer(candidateUserId, surveyAnswerIndex, surveyAnswerTemporality):
    if not (candidateUserId in _gformDFWithExtraColumn['userId']):
        # candidate userId is available:
        # associate this candidate userId to the answer
        _gformDFWithExtraColumn.loc[surveyAnswerIndex, 'userId'] = candidateUserId
        _gformDFWithExtraColumn.loc[surveyAnswerIndex, QTemporality] = surveyAnswerTemporality
    else:
        print("candidate " + candidateUserId + " already in use")
    return ((_gformDFWithExtraColumn.loc[surveyAnswerIndex, 'userId'] == candidateUserId)\
            and (_gformDFWithExtraColumn.loc[surveyAnswerIndex, QTemporality] == surveyAnswerTemporality))

In [ ]:
_associateUserIdsWithGFAnswerProgress = FloatProgress(min=0, max=10)

def associateUserIdsWithGFAnswer(candidateUserIds, surveyAnswerIndex, surveyAnswerTemporality, isConsoleVerbose = False):
    if len(candidateUserIds) > 0:
        # if(len(candidateUserIds) == 1):
        # perfect case if this userId is available
        _associateUserIdsWithGFAnswerProgress.max = len(candidateUserIds)
        _associateUserIdsWithGFAnswerProgress.value = 0
        _associateUserIdsWithGFAnswerProgress.description='associateUserIdsWithGFAnswer ' + str(surveyAnswerIndex) + ' progress:'
        
        for candidate in candidateUserIds:
            _associateUserIdsWithGFAnswerProgress.value += 1
            if not (candidate in _gformDFWithExtraColumn['userId']):
                if isConsoleVerbose:
                    print("candidate " + candidate + " available")
                # candidate userId is available:
                # associate this candidate userId to the answer
                _gformDFWithExtraColumn.loc[surveyAnswerIndex, 'userId'] = candidate
                _gformDFWithExtraColumn.loc[surveyAnswerIndex, QTemporality] = surveyAnswerTemporality
                return True
        if isConsoleVerbose:
            print("candidate " + candidate + " already in use")
    else:
        if isConsoleVerbose:
            print("no candidate userId")
    return False
data preparation

In [ ]:
_gformDFWithExtraColumn = gform.copy()

# initialization of 'userId' column
undefinedIndices = _gformDFWithExtraColumn[QTemporality] == answerTemporalities[2]
# indices for which the user id needs to be found
_gformDFWithExtraColumn.loc[undefinedIndices, 'userId'] = 0
# indices for which the user id is known
_gformDFWithExtraColumn.loc[~undefinedIndices, 'userId'] = _gformDFWithExtraColumn.loc[~undefinedIndices, localplayerguidkey]

totalCount = len(_gformDFWithExtraColumn)
beforeCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[0]])
afterCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[1]])
undefinedCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[2]])

print(\
    "totalCount="+str(totalCount)+\
    "\nbeforeCounts="+str(beforeCounts)+\
    "\nafterCounts="+str(afterCounts)+\
    "\nundefinedCounts="+str(undefinedCounts)+\
    "\n"
     )
      
timestamp = pd.Timestamp.min
#surveyAnswer = _gformDFWithExtraColumn.loc[0,:]

In [ ]:
#for answerIndex in _gformDFWithExtraColumn.index:
#    if(_gformDFWithExtraColumn.loc[answerIndex,QTemporality] == answerTemporalities[2]):
#        print("answerIndex="+str(answerIndex))
#        print(_gformDFWithExtraColumn.loc[answerIndex,localplayerguidkey])
#        print(_gformDFWithExtraColumn.loc[answerIndex,QTimestamp])
#        #surveyAnswer = _gformDFWithExtraColumn.loc[answerIndex,:]
#        break

#timestamp = _gformDFWithExtraColumn.loc[answerIndex,QTimestamp]
#timestamp.date()

#rmdf1522.loc[0,'userTime'].date() == timestamp.date()
computation of new temporality

In [ ]:
#timestamp = _gformDFWithExtraColumn.loc[29,QTimestamp]
#timestamp

In [ ]:
#eventsOnThisDay['userTime'].dropna()

In [ ]:
# inputs:
#   answerIndex
#
# output:
#   _gformDFWithExtraColumn[answerIndex, 'userId'] may be set
#   _gformDFWithExtraColumn[answerIndex, QTemporality] may be set
#

answerIndex = 29
_rmdf = rmdf1522
isConsoleVerbose = False

#
# 1st pass: 1 hour immediate time neighbors
#   - 1h before and after, search for unlinked, matching, *unique*
#   - if can only be pretest or posttest, so be it
#   - if both possible, rely on survey answer
# 2nd pass:
#   - 1h before and after, search for unlinked, matching, *closest*
#   - as pretest, closest previous 'start'-type event with no later events and with no linked survey answer
#   or closest series of events with no linked survey answer
#   - as posttest, closest prior series of events with no linked survey answer
# 3rd pass:
#   - 1 day before and after, search for unlinked, matching, closest
# 4th pass:
#   - before and after, search for unlinked, matching, closest

_setAnswerTemporalityProgress = FloatProgress(min=0, max=5)

def setAnswerTemporality(answerIndex, _rmdf = rmdf1522, passNumber = 1, isConsoleVerbose = False):
    _setAnswerTemporalityProgress.description='setAnswerTemporality ' + str(answerIndex) + ' progress:'
    if isConsoleVerbose:
        print("\n\nsetAnswerTemporality(" + str(answerIndex) + ")")

    # test if already set
    if _gformDFWithExtraColumn.loc[answerIndex, "Temporality"] != answerTemporalities[2]:
        if isConsoleVerbose:
            print("temporality already set to '" + _gformDFWithExtraColumn.loc[answerIndex, "Temporality"] + "'")
    else:
        timestamp = _gformDFWithExtraColumn.loc[answerIndex,QTimestamp]

        _setAnswerTemporalityProgress.value += 1
################################################################################################################################################################
        eventsBounding = _rmdf
        if (passNumber == 1 or passNumber == 2):
            #eventsBounding1h
            eventsBounding = _rmdf[abs(_rmdf['userTime'] - timestamp) < datetime.timedelta(hours = 1)]
        elif (passNumber == 3):
            #eventsBounding48h
            eventsBounding = _rmdf[abs(_rmdf['userTime'] - timestamp) < datetime.timedelta(days = 1)]

        if len(eventsBounding) == 0:
            if isConsoleVerbose:
                print("no eventsBounding for user id '" + _gformDFWithExtraColumn.loc[answerIndex, localplayerguidkey] + "'")
        else:
            if isConsoleVerbose:
                print("found eventsBounding for user id '" + _gformDFWithExtraColumn.loc[answerIndex, localplayerguidkey] + "'")
################################################################################################################################################################

        _setAnswerTemporalityProgress.value += 1

        eventsBefore = eventsBounding[(eventsBounding['userTime'] < timestamp)].sort_values(by='userTime', ascending=False)
        eventsAfter  = eventsBounding[(eventsBounding['userTime'] > timestamp)].sort_values(by='userTime', ascending=True)
        #print("eventsBefore="+str(eventsBefore[['type', 'userId']].head())+"...")
        #print("eventsAfter="+str(eventsAfter[['type', 'userId']].head())+"...")

        if(len(eventsBefore) == 0):
            if isConsoleVerbose:
                print("no eventsBefore around " + str(timestamp.date()) + " for user id '" + _gformDFWithExtraColumn.loc[answerIndex, localplayerguidkey] + "'")
        if(len(eventsAfter) == 0):
            if isConsoleVerbose:
                print("no eventsAfter around " + str(timestamp.date()) + " for user id '" + _gformDFWithExtraColumn.loc[answerIndex, localplayerguidkey] + "'")
################################################################################################################################################################

        _setAnswerTemporalityProgress.value += 1

        #potentialPretestUserIds = pd.Series()
        strictPotentialPretestUserIds = pd.Series()
        #potentialPosttestUserIds = pd.Series()
        strictPotentialPosttestUserIds = pd.Series()

        # construct potential user ids from user ids found in events before the survey answer
        # test is pretest if userId in <timestamp range and event type is start OR
        #  or if userId only in >timestamp
        potentialPretestUserIds = pd.Series(eventsBefore[eventsBefore['type'] == 'start'].sort_values(by='userTime', ascending=False)['userId'].unique())
        if(len(potentialPretestUserIds) > 0):
            strictPotentialPretestUserIds = potentialPretestUserIds[potentialPretestUserIds.isin(eventsAfter['userId'])]

        # construct potential user ids from user ids found in events after the survey answer
        potentialPosttestUserIds = pd.Series(eventsBefore[~(eventsBefore['userId'].isin(potentialPretestUserIds))].sort_values(by='userTime', ascending=True)['userId'].unique())
        if(len(potentialPosttestUserIds) > 0):
            strictPotentialPosttestUserIds = pd.Series(potentialPosttestUserIds[~potentialPosttestUserIds.isin(eventsAfter['userId'])].unique())

        _setAnswerTemporalityProgress.value += 1
            
        #print(
        #   "#potentialPretestUserIds="+str(len(potentialPretestUserIds))\
        # + "\n#strictPotentialPretestUserIds="+str(len(strictPotentialPretestUserIds))\
        # + "\n#potentialPosttestUserIds="+str(len(potentialPosttestUserIds))\
        # + "\n#strictPotentialPosttestUserIds="+str(len(strictPotentialPosttestUserIds)))
        #print("\n")

        # check whether order is conserved
        # remove userIds that are already linked to a survey answer
        potentialPretestUserIds        = potentialPretestUserIds[~potentialPretestUserIds.isin(_gformDFWithExtraColumn['userId'])]
        strictPotentialPretestUserIds  = strictPotentialPretestUserIds[~strictPotentialPretestUserIds.isin(_gformDFWithExtraColumn['userId'])]
        potentialPosttestUserIds       = potentialPosttestUserIds[~potentialPosttestUserIds.isin(_gformDFWithExtraColumn['userId'])]
        strictPotentialPosttestUserIds = strictPotentialPosttestUserIds[~strictPotentialPosttestUserIds.isin(_gformDFWithExtraColumn['userId'])]

    #        print(
    #           "#potentialPretestUserIds="+str(len(potentialPretestUserIds))\
    #         + "\n#strictPotentialPretestUserIds="+str(len(strictPotentialPretestUserIds))\
    #         + "\n#potentialPosttestUserIds="+str(len(potentialPosttestUserIds))\
    #         + "\n#strictPotentialPosttestUserIds="+str(len(strictPotentialPosttestUserIds)))
    #        print("\n\n")

        # booleans describing the type of survey answer
        isPotentialPretest = len(potentialPretestUserIds) > 0
        isStrictPotentialPretest = len(strictPotentialPretestUserIds) > 0
        isPotentialPosttest = len(potentialPosttestUserIds) > 0
        isStrictPotentialPosttest = len(strictPotentialPosttestUserIds) > 0

        _setAnswerTemporalityProgress.value += 1
        
        if(isPotentialPretest and not isPotentialPosttest):
            # definitely a pretest in any case.
            # which userId is linked to this survey answer?
            if (passNumber == 1):
                if len(strictPotentialPretestUserIds) == 1:
                    if not associateUserIdsWithGFAnswer(strictPotentialPretestUserIds, answerIndex, answerTemporalities[0], isConsoleVerbose):
                        # no solution
                        # the candidate user id is already being used by an other survey answer
                        if isConsoleVerbose:
                            print("fail: strictPotentialPretestUserIds")
                    else:
                        if isConsoleVerbose:
                            print("success: strictPotentialPretestUserIds")
            else:
                if isStrictPotentialPretest:
                    if not associateUserIdsWithGFAnswer(strictPotentialPretestUserIds, answerIndex, answerTemporalities[0], isConsoleVerbose):
                        # no solution
                        # the candidate user id is already being used by an other survey answer
                        if isConsoleVerbose:
                            print("fail: strictPotentialPretestUserIds")
                    else:
                        if isConsoleVerbose:
                            print("success: strictPotentialPretestUserIds")
                else:
                    if not associateUserIdsWithGFAnswer(potentialPretestUserIds, answerIndex, answerTemporalities[0], isConsoleVerbose):
                        # no solution
                        # take closest in time?
                        if isConsoleVerbose:
                            print("fail: potentialPretestUserIds")
                    else:
                        if isConsoleVerbose:
                            print("success: potentialPretestUserIds")
        elif (isPotentialPosttest and not isPotentialPretest):
            # definitely a posttest in any case.
            # which userId is linked to this survey answer?
            if (passNumber == 1):
                if len(strictPotentialPosttestUserIds) == 1:
                    if not associateUserIdsWithGFAnswer(strictPotentialPosttestUserIds, answerIndex, answerTemporalities[1], isConsoleVerbose):
                        # no solution
                        # the candidate user id is already being used by an other survey answer
                        if isConsoleVerbose:
                            print("fail: strictPotentialPosttestUserIds")
                    else:
                        if isConsoleVerbose:
                            print("success: strictPotentialPosttestUserIds")
            else:
                if isStrictPotentialPosttest:
                    if not associateUserIdsWithGFAnswer(strictPotentialPosttestUserIds, answerIndex, answerTemporalities[1], isConsoleVerbose):
                        # no solution
                        # the candidate user id is already being used by an other survey answer
                        if isConsoleVerbose:
                            print("fail: strictPotentialPosttestUserIds")
                    else:
                        if isConsoleVerbose:
                            print("success: strictPotentialPosttestUserIds")
                else:
                    if not associateUserIdsWithGFAnswer(potentialPosttestUserIds, answerIndex, answerTemporalities[1], isConsoleVerbose):
                        # no solution
                        # take closest in time?
                        if isConsoleVerbose:
                            print("fail: potentialPosttestUserIds")
                    else:
                        if isConsoleVerbose:
                            print("success: potentialPosttestUserIds")
        else:
            # pretest or posttest?
            if isConsoleVerbose:
                print("couldn\'t determine pretest or posttest")

    #        print("\n\n")

    #        print(
    #           "#potentialPretestUserIds="+str(len(potentialPretestUserIds))\
    #         + "\n#strictPotentialPretestUserIds="+str(len(strictPotentialPretestUserIds))\
    #         + "\n#potentialPosttestUserIds="+str(len(potentialPosttestUserIds))\
    #         + "\n#strictPotentialPosttestUserIds="+str(len(strictPotentialPosttestUserIds)))

    #        print("\n")

    #        print(
    #          "isPotentialPretest="+str(isPotentialPretest)\
    #        + "\nisStrictPotentialPretest="+str(isStrictPotentialPretest)\
    #        + "\nisPotentialPosttest="+str(isPotentialPosttest)\
    #        + "\nisStrictPotentialPosttest="+str(isStrictPotentialPosttest))
        if isConsoleVerbose:
            print("end temporality=" + _gformDFWithExtraColumn.loc[answerIndex, "Temporality"])
time tests

In [ ]:
answerIndex = 29
timestamp = _gformDFWithExtraColumn.loc[answerIndex,QTimestamp]
timestampPrevDay = timestamp.date() - datetime.timedelta(days = 1)
timestampNextDay = timestamp.date() + datetime.timedelta(days = 1)
print(str((timestampPrevDay, timestamp.date(), timestampNextDay)))
eventsOnPrevDay = rmdf1522[rmdf1522['userTime'].apply(getFuncHasSameDate(timestampPrevDay))].dropna(axis=1, how='all')
eventsOnThisDay = rmdf1522[rmdf1522['userTime'].apply(getFuncHasSameDate(timestamp.date()))].dropna(axis=1, how='all')
eventsOnNextDay = rmdf1522[rmdf1522['userTime'].apply(getFuncHasSameDate(timestampNextDay))].dropna(axis=1, how='all')
print((len(eventsOnPrevDay),len(eventsOnThisDay),len(eventsOnNextDay)))

In [ ]:
#eventsOnPrevDay['userTime'].max(), eventsOnNextDay['userTime'].min()
test on all undefined

In [ ]:
_passProgress = FloatProgress(min=0, max=10)
def applyPass(passNumber):
    _passProgress.description='Pass ' + str(passNumber) + ' progress:'
    _passProgress.max = len(_gformDFWithExtraColumn.index)
    _passProgress.value = 0
    for answerIndex in _gformDFWithExtraColumn.index:
        _passProgress.value += 1
        if(_gformDFWithExtraColumn.loc[answerIndex,QTemporality] == answerTemporalities[2]):
            setAnswerTemporality(answerIndex, _rmdf = rmdf1522, passNumber = passNumber);

In [ ]:
_gformDFWithExtraColumn = gform.copy()

def resetGFormWithExtraColumn():
    global _gformDFWithExtraColumn
    _gformDFWithExtraColumn = gform.copy()

    # initialization of 'userId' column
    undefinedIndices = _gformDFWithExtraColumn[QTemporality] == answerTemporalities[2]
    # indices for which the user id needs to be found
    _gformDFWithExtraColumn.loc[undefinedIndices, 'userId'] = 0
    # indices for which the user id is known
    _gformDFWithExtraColumn.loc[~undefinedIndices, 'userId'] = _gformDFWithExtraColumn.loc[~undefinedIndices, localplayerguidkey]

    totalCount = len(_gformDFWithExtraColumn)
    beforeCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[0]])
    afterCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[1]])
    undefinedCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[2]])

    print(\
        "totalCount="+str(totalCount)+\
        "\nbeforeCounts="+str(beforeCounts)+\
        "\nafterCounts="+str(afterCounts)+\
        "\nundefinedCounts="+str(undefinedCounts)+\
        "\n"
         )

    #timestamp = pd.Timestamp.min
    #surveyAnswer = _gformDFWithExtraColumn.loc[0,:]

In [ ]:
resetGFormWithExtraColumn()

if answerTemporalities[2] in gform[QTemporality].values:

    _undefinedCount = IntText(0, description='undefined count:')
    _whileCount = IntText(0, description='while count:')
    display(_undefinedCount)
    display(_whileCount)

    result = pd.DataFrame(data=gform[QTemporality].value_counts())
    stepsCount = 4

    __progress = FloatProgress(min=0, max=stepsCount, description='Pass count:')
    display(__progress)
    display(_passProgress)
    display(_setAnswerTemporalityProgress)
    display(_associateUserIdsWithGFAnswerProgress)

    for passNumber in range(1,5):
        previousValue = 0 #gform[QTemporality].value_counts()[answerTemporalities[2]]

        _whileCount.value = 0
        while (previousValue != _gformDFWithExtraColumn[QTemporality].value_counts()[answerTemporalities[2]]):
            _whileCount.value += 1
            previousValue = _gformDFWithExtraColumn[QTemporality].value_counts()[answerTemporalities[2]]
            _undefinedCount.value = previousValue
            applyPass(passNumber)

        print("applied pass " + str(passNumber) + " " + str(_whileCount.value) + " times")

        __progress.value += 1
        result['pass ' + str(passNumber)] = _gformDFWithExtraColumn[QTemporality].value_counts()

    result

In [ ]:
#def getExtendedTemporality( answerDate, firstGameEventDate ):
#result = answerTemporalities[2]
#if(gameEventDate != pd.Timestamp.max.tz_localize('utc')):
#    if(answerDate <= gameEventDate):
#        result = answerTemporalities[0]
#    elif (answerDate > gameEventDate):
#        result = answerTemporalities[1]
#else:
#    # search for a close-enough session, even if it doesn't belong to the user
#    # check that this user doesn't already have survey answers
#    
#    # search for games starting just after
#    #   pretests are more frequent than posttests
#    eventsAfter = rmdf1522[rmdf1522['serverTime']]
#    
#    # search for games ending just before
#    
#    # search for any overlapping game
#    
#    # search for any overlapping game
#    
#
#result

def setAnswerTemporalities( _gformDF = gform ):

check whether temporalities have already been set

if(len(_gformDF[QTemporality].unique()) == 1):

# format : key = _userId, value = [_firstEventDate, 0 or _gformDF.index of before, 0 or _gformDF.index of after]
temporalities = {}

for _index in _gformDF.index:
    _userId = _gformDF.loc[_index,localplayerguidkey]
    _firstEventDate, beforeIndex, afterIndex = [0,0,0]

    if _userId in temporalities:
        _firstEventDate, beforeIndex, afterIndex = temporalities[_userId]
    else:
        _firstEventDate = getFirstEventDate(_userId)

    temporality = getTemporality(_gformDF.loc[_index,QTimestamp],_firstEventDate)

    if temporality == answerTemporalities[0] and beforeIndex != 0 :
        if _gformDF.loc[_index,QTimestamp] > _gformDF.loc[beforeIndex,QTimestamp]:
            _gformDF.loc[beforeIndex,QTemporality] = answerTemporalities[2]
        else:
            temporality = answerTemporalities[2]
    elif temporality == answerTemporalities[1] and afterIndex != 0 :
        if _gformDF.loc[_index,QTimestamp] < _gformDF.loc[afterIndex,QTimestamp]:
            _gformDF.loc[afterIndex,QTemporality] = answerTemporalities[2]
        else:
            temporality = answerTemporalities[2]

    _gformDF.loc[_index,QTemporality] = temporality
    if temporality == answerTemporalities[0]:
        beforeIndex = _index
    elif temporality == answerTemporalities[1]:
        afterIndex = _index

    temporalities[_userId] = [_firstEventDate, beforeIndex, afterIndex]
print("temporalities set")